{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "19076b00",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !wget https://huggingface.co/datasets/mesolitica/malaysian-benchmark/resolve/main/iium-coffession-en.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8c160b3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "import requests\n",
    "import os\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b7b5e3b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('iium-coffession-en.json') as fopen:\n",
    "    d = json.load(fopen)\n",
    "    \n",
    "len(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "873233ac",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'from': \"Masalah jerawat akan menjadi lebih teruk kalau guna pencuci muka bersifat alkali/high pH, dan bila guna pencuci muka yang low pH, acne akan berkurangan dan kulit akan improve (Kalau u all rajin, boleh la baca study Korting et al, tajuk dia 'The Influence of the Regular Use of a Soap or an Acidic Syndet Bar on Pre-Acne', tak pasti jumpa ke tak kalau Google, tapi dalam blog yang I baca ni ada direct link pergi ke pdf file study tu).\",\n",
       " 'to': \"Acne problems can be worse if you use alkaline / high pH cleanser, and when you use low pH cleanser, acne will decrease and your skin will improve (If you are all diligent, you can read the study Korting et al. Influence of the Regular Use of a Soap or an Acidic Syndet Bar on Pre-Acne ', not sure if you can find it through Google, but in the blog I read there is a direct link to the pdf file study).\"}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8a222587",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir google-translate-iium-en"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "0b246de3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [45:52<00:00,  2.75s/it]\n"
     ]
    }
   ],
   "source": [
    "for i in tqdm(range(len(d))):\n",
    "    filename = os.path.join('google-translate-iium-en', f'{i}.json')\n",
    "    \n",
    "    if os.path.exists(filename):\n",
    "        continue\n",
    "    \n",
    "    headers = {\n",
    "        'accept': 'application/json',\n",
    "        'Content-Type': 'application/json',\n",
    "    }\n",
    "    \n",
    "    l = d[i]['from']\n",
    "\n",
    "    json_data = {\n",
    "        'text': l,\n",
    "    }\n",
    "\n",
    "    response = requests.post('http://localhost:8003/translate', headers=headers, json=json_data)\n",
    "    r = response.json()\n",
    "    with open(filename, 'w') as fopen:\n",
    "        json.dump({'text': l, 'r': r}, fopen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "48ee2392",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sacrebleu.metrics import BLEU, CHRF, TER\n",
    "\n",
    "chrf = CHRF(word_order = 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "57947d80",
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted = []\n",
    "for i in range(len(d)):\n",
    "    filename = os.path.join('google-translate-iium-en', f'{i}.json')\n",
    "    with open(filename) as fopen:\n",
    "        d_ = json.load(fopen)\n",
    "\n",
    "    predicted.append(d_['r']['result'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "68b99c6f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "chrF2++ = 53.83"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score = chrf.corpus_score(predicted, [[d_['to'] for d_ in d]])\n",
    "score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9dd6d7f0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
